import pandas as pd
pd.read_csv(r"C:\Users\MD Talha Mobashshir\Downloads\india-usa_innings_data.csv")
| batter | bowler | non_striker | runs_batter | runs_extras | runs_total | wickets_0_player_out | wickets_0_kind | team | over | ... | wickets_0_fielders_0_name | review_by | review_umpire | review_batter | review_decision | review_type | extras_legbyes | wickets_0_fielders_1_name | extras_noballs | extras_penalty | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Shayan Jahangir | Arshdeep Singh | SR Taylor | 0 | 0 | 0 | Shayan Jahangir | lbw | United States of America | 0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 1 | AGS Gous | Arshdeep Singh | SR Taylor | 0 | 0 | 0 | NaN | NaN | United States of America | 0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2 | AGS Gous | Arshdeep Singh | SR Taylor | 0 | 0 | 0 | NaN | NaN | United States of America | 0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 3 | AGS Gous | Arshdeep Singh | SR Taylor | 0 | 1 | 1 | NaN | NaN | United States of America | 0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 4 | AGS Gous | Arshdeep Singh | SR Taylor | 2 | 0 | 2 | NaN | NaN | United States of America | 0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 231 | SA Yadav | SN Netravalkar | S Dube | 0 | 0 | 0 | NaN | NaN | India | 17 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 232 | SA Yadav | SN Netravalkar | S Dube | 1 | 0 | 1 | NaN | NaN | India | 17 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 233 | SA Yadav | Ali Khan | S Dube | 1 | 0 | 1 | NaN | NaN | India | 18 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 234 | S Dube | Ali Khan | SA Yadav | 0 | 1 | 1 | NaN | NaN | India | 18 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 235 | S Dube | Ali Khan | SA Yadav | 2 | 0 | 2 | NaN | NaN | India | 18 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
236 rows × 21 columns
data=pd.read_csv(r"C:\Users\MD Talha Mobashshir\Downloads\india-usa_innings_data.csv")
data.head()
| batter | bowler | non_striker | runs_batter | runs_extras | runs_total | wickets_0_player_out | wickets_0_kind | team | over | ... | wickets_0_fielders_0_name | review_by | review_umpire | review_batter | review_decision | review_type | extras_legbyes | wickets_0_fielders_1_name | extras_noballs | extras_penalty | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Shayan Jahangir | Arshdeep Singh | SR Taylor | 0 | 0 | 0 | Shayan Jahangir | lbw | United States of America | 0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 1 | AGS Gous | Arshdeep Singh | SR Taylor | 0 | 0 | 0 | NaN | NaN | United States of America | 0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2 | AGS Gous | Arshdeep Singh | SR Taylor | 0 | 0 | 0 | NaN | NaN | United States of America | 0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 3 | AGS Gous | Arshdeep Singh | SR Taylor | 0 | 1 | 1 | NaN | NaN | United States of America | 0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 4 | AGS Gous | Arshdeep Singh | SR Taylor | 2 | 0 | 2 | NaN | NaN | United States of America | 0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
5 rows × 21 columns
data.shape
(236, 21)
data.columns
Index(['batter', 'bowler', 'non_striker', 'runs_batter', 'runs_extras',
'runs_total', 'wickets_0_player_out', 'wickets_0_kind', 'team', 'over',
'extras_wides', 'wickets_0_fielders_0_name', 'review_by',
'review_umpire', 'review_batter', 'review_decision', 'review_type',
'extras_legbyes', 'wickets_0_fielders_1_name', 'extras_noballs',
'extras_penalty'],
dtype='object')
missing_values=data.isnull().sum() #checking for missing values in the dataset
data_types=data.dtypes #checking the data types of the columns
missing_values
batter 0 bowler 0 non_striker 0 runs_batter 0 runs_extras 0 runs_total 0 wickets_0_player_out 225 wickets_0_kind 225 team 0 over 0 extras_wides 231 wickets_0_fielders_0_name 228 review_by 235 review_umpire 235 review_batter 235 review_decision 235 review_type 235 extras_legbyes 234 wickets_0_fielders_1_name 235 extras_noballs 235 extras_penalty 235 dtype: int64
data_types
batter object bowler object non_striker object runs_batter int64 runs_extras int64 runs_total int64 wickets_0_player_out object wickets_0_kind object team object over int64 extras_wides float64 wickets_0_fielders_0_name object review_by object review_umpire object review_batter object review_decision object review_type object extras_legbyes float64 wickets_0_fielders_1_name object extras_noballs float64 extras_penalty float64 dtype: object
#the data has null values in various columns but in such dataset, even null values have a meaning,so it ia left as it is
#total runs scored by each team
data.groupby("team")["runs_total"].sum()
team India 111 United States of America 110 Name: runs_total, dtype: int64
total_runs=data.groupby("team")["runs_total"].sum()
total_runs
team India 111 United States of America 110 Name: runs_total, dtype: int64
#total wickets taken by each team
data["wickets_0_player_out"].notna().groupby(data["team"]).sum()
team India 3 United States of America 8 Name: wickets_0_player_out, dtype: int64
total_wickets=data["wickets_0_player_out"].notna().groupby(data["team"]).sum()
total_wickets
team India 3 United States of America 8 Name: wickets_0_player_out, dtype: int64
# total extras by each team
data[["team","runs_extras","extras_wides","extras_legbyes","extras_noballs","extras_penalty"]].groupby("team").sum()
| runs_extras | extras_wides | extras_legbyes | extras_noballs | extras_penalty | |
|---|---|---|---|---|---|
| team | |||||
| India | 9 | 2.0 | 1.0 | 1.0 | 5.0 |
| United States of America | 8 | 7.0 | 1.0 | 0.0 | 0.0 |
total_extras=data[["team","runs_extras","extras_wides","extras_legbyes","extras_noballs","extras_penalty"]].groupby("team").sum()
# runs scored by each batter
data.groupby("batter")["runs_batter"].sum()
batter AGS Gous 2 Aaron Jones 11 CJ Anderson 15 Harmeet Singh 10 Jasdeep Singh 2 NR Kumar 27 RG Sharma 3 RR Pant 18 S Dube 31 SA Yadav 50 SC van Schalkwyk 11 SR Taylor 24 Shayan Jahangir 0 V Kohli 0 Name: runs_batter, dtype: int64
batter_runs=data.groupby("batter")["runs_batter"].sum()
# balls faced by each batter
data.groupby("batter").size()
batter AGS Gous 6 Aaron Jones 22 CJ Anderson 12 Harmeet Singh 10 Jasdeep Singh 7 NR Kumar 24 RG Sharma 6 RR Pant 20 S Dube 37 SA Yadav 49 SC van Schalkwyk 10 SR Taylor 31 Shayan Jahangir 1 V Kohli 1 dtype: int64
balls_faced=data.groupby("batter").size()
#strike rate of each batter
strike_rate=(batter_runs/balls_faced)*100
strike_rate
batter AGS Gous 33.333333 Aaron Jones 50.000000 CJ Anderson 125.000000 Harmeet Singh 100.000000 Jasdeep Singh 28.571429 NR Kumar 112.500000 RG Sharma 50.000000 RR Pant 90.000000 S Dube 83.783784 SA Yadav 102.040816 SC van Schalkwyk 110.000000 SR Taylor 77.419355 Shayan Jahangir 0.000000 V Kohli 0.000000 dtype: float64
#boundaries hit by each batter
data[(data["runs_batter"]==4) | (data["runs_batter"]==6)].groupby(["batter","runs_batter"]).size().unstack(fill_value=0)
| runs_batter | 4 | 6 |
|---|---|---|
| batter | ||
| Aaron Jones | 0 | 1 |
| CJ Anderson | 1 | 1 |
| Harmeet Singh | 0 | 1 |
| NR Kumar | 2 | 1 |
| RR Pant | 1 | 1 |
| S Dube | 1 | 1 |
| SA Yadav | 2 | 2 |
| SC van Schalkwyk | 1 | 0 |
| SR Taylor | 0 | 2 |
boundaries=data[(data["runs_batter"]==4) | (data["runs_batter"]==6)].groupby(["batter","runs_batter"]).size().unstack(fill_value=0)
#wickets taken by each bowler
data["wickets_0_player_out"].notna().groupby(data["bowler"]).sum()
bowler AR Patel 1 Ali Khan 1 Arshdeep Singh 4 CJ Anderson 0 HH Pandya 2 JJ Bumrah 0 Jasdeep Singh 0 Mohammed Siraj 1 S Dube 0 SC van Schalkwyk 0 SN Netravalkar 2 Name: wickets_0_player_out, dtype: int64
wickets_taken=data["wickets_0_player_out"].notna().groupby(data["bowler"]).sum()
#runs conceded by each bowler
data.groupby("bowler")["runs_total"].sum()
bowler AR Patel 25 Ali Khan 22 Arshdeep Singh 9 CJ Anderson 22 HH Pandya 15 JJ Bumrah 25 Jasdeep Singh 24 Mohammed Siraj 25 S Dube 11 SC van Schalkwyk 25 SN Netravalkar 18 Name: runs_total, dtype: int64
runs_conceded=data.groupby("bowler")["runs_total"].sum()
#balls bowled by each bowler
data.groupby("bowler").size()
bowler AR Patel 19 Ali Khan 21 Arshdeep Singh 25 CJ Anderson 19 HH Pandya 24 JJ Bumrah 25 Jasdeep Singh 25 Mohammed Siraj 24 S Dube 6 SC van Schalkwyk 24 SN Netravalkar 24 dtype: int64
balls_bowled=data.groupby("bowler").size()
#economy rate of each bowler
economy_rate=runs_conceded/(balls_bowled/6)
economy_rate
bowler AR Patel 7.894737 Ali Khan 6.285714 Arshdeep Singh 2.160000 CJ Anderson 6.947368 HH Pandya 3.750000 JJ Bumrah 6.000000 Jasdeep Singh 5.760000 Mohammed Siraj 6.250000 S Dube 11.000000 SC van Schalkwyk 6.250000 SN Netravalkar 4.500000 dtype: float64
#dot balls bowled by each bowler
data[data["runs_total"]==0].groupby("bowler").size()
bowler AR Patel 5 Ali Khan 7 Arshdeep Singh 17 CJ Anderson 8 HH Pandya 18 JJ Bumrah 14 Jasdeep Singh 11 Mohammed Siraj 11 S Dube 3 SC van Schalkwyk 8 SN Netravalkar 13 dtype: int64
dot_balls=data[data["runs_total"]==0].groupby("bowler").size()
#batter stats
batter_stats=pd.DataFrame({
"Runs": batter_runs,
"Balls Faced": balls_faced,
"Strike Rate": strike_rate
}).join(boundaries)
batter_stats
| Runs | Balls Faced | Strike Rate | 4 | 6 | |
|---|---|---|---|---|---|
| batter | |||||
| AGS Gous | 2 | 6 | 33.333333 | NaN | NaN |
| Aaron Jones | 11 | 22 | 50.000000 | 0.0 | 1.0 |
| CJ Anderson | 15 | 12 | 125.000000 | 1.0 | 1.0 |
| Harmeet Singh | 10 | 10 | 100.000000 | 0.0 | 1.0 |
| Jasdeep Singh | 2 | 7 | 28.571429 | NaN | NaN |
| NR Kumar | 27 | 24 | 112.500000 | 2.0 | 1.0 |
| RG Sharma | 3 | 6 | 50.000000 | NaN | NaN |
| RR Pant | 18 | 20 | 90.000000 | 1.0 | 1.0 |
| S Dube | 31 | 37 | 83.783784 | 1.0 | 1.0 |
| SA Yadav | 50 | 49 | 102.040816 | 2.0 | 2.0 |
| SC van Schalkwyk | 11 | 10 | 110.000000 | 1.0 | 0.0 |
| SR Taylor | 24 | 31 | 77.419355 | 0.0 | 2.0 |
| Shayan Jahangir | 0 | 1 | 0.000000 | NaN | NaN |
| V Kohli | 0 | 1 | 0.000000 | NaN | NaN |
#bowlers_stats
bowlers_stats=pd.DataFrame({
"Wickets":wickets_taken,
"Runs conceded":runs_conceded,
"Balls Bowled":balls_bowled,
"Economy rate":economy_rate,
"Dot balls":dot_balls})
bowlers_stats
| Wickets | Runs conceded | Balls Bowled | Economy rate | Dot balls | |
|---|---|---|---|---|---|
| bowler | |||||
| AR Patel | 1 | 25 | 19 | 7.894737 | 5 |
| Ali Khan | 1 | 22 | 21 | 6.285714 | 7 |
| Arshdeep Singh | 4 | 9 | 25 | 2.160000 | 17 |
| CJ Anderson | 0 | 22 | 19 | 6.947368 | 8 |
| HH Pandya | 2 | 15 | 24 | 3.750000 | 18 |
| JJ Bumrah | 0 | 25 | 25 | 6.000000 | 14 |
| Jasdeep Singh | 0 | 24 | 25 | 5.760000 | 11 |
| Mohammed Siraj | 1 | 25 | 24 | 6.250000 | 11 |
| S Dube | 0 | 11 | 6 | 11.000000 | 3 |
| SC van Schalkwyk | 0 | 25 | 24 | 6.250000 | 8 |
| SN Netravalkar | 2 | 18 | 24 | 4.500000 | 13 |
total_runs
team India 111 United States of America 110 Name: runs_total, dtype: int64
total_extras
| runs_extras | extras_wides | extras_legbyes | extras_noballs | extras_penalty | |
|---|---|---|---|---|---|
| team | |||||
| India | 9 | 2.0 | 1.0 | 1.0 | 5.0 |
| United States of America | 8 | 7.0 | 1.0 | 0.0 | 0.0 |
total_wickets
team India 3 United States of America 8 Name: wickets_0_player_out, dtype: int64
import plotly.graph_objects as go
data[data["team"]=="India"].groupby("over")["runs_total"].sum().cumsum()
over 0 2 1 10 2 12 3 16 4 25 5 33 6 36 7 39 8 41 9 47 10 53 11 55 12 60 13 67 14 81 15 87 16 102 17 107 18 111 Name: runs_total, dtype: int64
india_runs_progression=data[data["team"]=="India"].groupby("over")["runs_total"].sum().cumsum()
usa_runs_progression=data[data["team"]=="United States of America"].groupby("over")["runs_total"].sum().cumsum()
fig=go.Figure()
fig.add_trace(go.Scatter(x=india_runs_progression.index,y=india_runs_progression.values,mode="lines+markers", name="India"))
fig.add_trace(go.Scatter(x=usa_runs_progression.index,y=usa_runs_progression.values,mode="lines+markers", name="USA"))
fig.update_layout(title="Runs Progression Over Overs",xaxis_title="Overs",yaxis_title="Cumulative Runs",legend_title="Teams",template="plotly_white")
fig.show()
data[(data["team"]=="India") & (data["wickets_0_player_out"].notna())].groupby("over").size()
over 0 1 2 1 7 1 dtype: int64
india_wickets=data[(data["team"]=="India") & (data["wickets_0_player_out"].notna())].groupby("over").size()
usa_wickets=data[(data["team"]=="United States of America") & (data["wickets_0_player_out"].notna())].groupby("over").size()
fig=go.Figure()
fig.add_trace(go.Bar(x=india_wickets.index,y=india_wickets.values,name="India",marker_color="blue",opacity=0.7))
fig.add_trace(go.Bar(x=usa_wickets.index,y=usa_wickets.values,name="USA",marker_color="red",opacity=0.7))
fig.update_layout(title="Wickets Timeline",xaxis_title="Overs",yaxis_title="No. of Wickets",barmode="group",template="plotly_white",legend_title="Teams")
fig.show()
import plotly.express as px
fig=px.bar(batter_stats,x=batter_stats.index,y="Runs",title="Run Distribution by Batters",labels={"x":"Batter","Runs":"Runs Scored"},template="plotly_white")
fig.update_layout(xaxis_title="Batter",yaxis_title="Runs Scored",xaxis=dict(tickangle=90))
fig.show()
fig= go.Figure()
fig.add_trace(go.Scatter(x=bowlers_stats["Economy rate"],y=bowlers_stats["Wickets"],mode="markers+text",
text=bowlers_stats.index,textposition="top center",textfont=dict(family="sans serif",size=12,color="black"),
marker=dict(color="red",size=10),
name="Bowlers"))
fig.update_layout(title="Bowling Perfomance",xaxis_title="Economy Rate",
yaxis_title="Wickets Taken",template="plotly_white",autosize=False,width=800,height=600)
fig.show()
#separate data for India and USA
data[data["team"]=="India"].groupby(["over","batter","non_striker"])["runs_total"].sum().reset_index()
| over | batter | non_striker | runs_total | |
|---|---|---|---|---|
| 0 | 0 | RG Sharma | V Kohli | 1 |
| 1 | 0 | RR Pant | RG Sharma | 1 |
| 2 | 0 | V Kohli | RG Sharma | 0 |
| 3 | 1 | RG Sharma | RR Pant | 2 |
| 4 | 1 | RR Pant | RG Sharma | 6 |
| 5 | 2 | RG Sharma | RR Pant | 0 |
| 6 | 2 | RR Pant | SA Yadav | 1 |
| 7 | 2 | SA Yadav | RR Pant | 1 |
| 8 | 3 | RR Pant | SA Yadav | 2 |
| 9 | 3 | SA Yadav | RR Pant | 2 |
| 10 | 4 | RR Pant | SA Yadav | 0 |
| 11 | 4 | SA Yadav | RR Pant | 9 |
| 12 | 5 | RR Pant | SA Yadav | 6 |
| 13 | 5 | SA Yadav | RR Pant | 2 |
| 14 | 6 | RR Pant | SA Yadav | 1 |
| 15 | 6 | SA Yadav | RR Pant | 2 |
| 16 | 7 | RR Pant | SA Yadav | 2 |
| 17 | 7 | S Dube | SA Yadav | 0 |
| 18 | 7 | SA Yadav | RR Pant | 1 |
| 19 | 8 | S Dube | SA Yadav | 1 |
| 20 | 8 | SA Yadav | S Dube | 1 |
| 21 | 9 | S Dube | SA Yadav | 3 |
| 22 | 9 | SA Yadav | S Dube | 3 |
| 23 | 10 | S Dube | SA Yadav | 6 |
| 24 | 11 | S Dube | SA Yadav | 1 |
| 25 | 11 | SA Yadav | S Dube | 1 |
| 26 | 12 | S Dube | SA Yadav | 3 |
| 27 | 12 | SA Yadav | S Dube | 2 |
| 28 | 13 | S Dube | SA Yadav | 2 |
| 29 | 13 | SA Yadav | S Dube | 5 |
| 30 | 14 | S Dube | SA Yadav | 13 |
| 31 | 14 | SA Yadav | S Dube | 1 |
| 32 | 15 | S Dube | SA Yadav | 4 |
| 33 | 15 | SA Yadav | S Dube | 2 |
| 34 | 16 | S Dube | SA Yadav | 1 |
| 35 | 16 | SA Yadav | S Dube | 14 |
| 36 | 17 | S Dube | SA Yadav | 1 |
| 37 | 17 | SA Yadav | S Dube | 4 |
| 38 | 18 | S Dube | SA Yadav | 3 |
| 39 | 18 | SA Yadav | S Dube | 1 |
india_partnership_data=data[data["team"]=="India"].groupby(["over","batter","non_striker"])["runs_total"].sum().reset_index()
usa_partnership_data=data[data["team"]=="United States of America"].groupby(["over","batter","non_striker"])["runs_total"].sum().reset_index()
#creating pivot tables for better visualization
india_partnership_data.pivot(index="over",columns=["batter","non_striker"],values="runs_total").fillna(0)
| batter | RG Sharma | RR Pant | V Kohli | RG Sharma | RR Pant | SA Yadav | S Dube | SA Yadav |
|---|---|---|---|---|---|---|---|---|
| non_striker | V Kohli | RG Sharma | RG Sharma | RR Pant | SA Yadav | RR Pant | SA Yadav | S Dube |
| over | ||||||||
| 0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 1 | 0.0 | 6.0 | 0.0 | 2.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 2 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 |
| 3 | 0.0 | 0.0 | 0.0 | 0.0 | 2.0 | 2.0 | 0.0 | 0.0 |
| 4 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 9.0 | 0.0 | 0.0 |
| 5 | 0.0 | 0.0 | 0.0 | 0.0 | 6.0 | 2.0 | 0.0 | 0.0 |
| 6 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 2.0 | 0.0 | 0.0 |
| 7 | 0.0 | 0.0 | 0.0 | 0.0 | 2.0 | 1.0 | 0.0 | 0.0 |
| 8 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 |
| 9 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 3.0 | 3.0 |
| 10 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 6.0 | 0.0 |
| 11 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 |
| 12 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 3.0 | 2.0 |
| 13 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2.0 | 5.0 |
| 14 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 13.0 | 1.0 |
| 15 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4.0 | 2.0 |
| 16 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 14.0 |
| 17 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 4.0 |
| 18 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 3.0 | 1.0 |
india_partnership_pivot=india_partnership_data.pivot(index="over",columns=["batter","non_striker"],values="runs_total").fillna(0)
usa_partnership_pivot=usa_partnership_data.pivot(index="over",columns=["batter","non_striker"],values="runs_total").fillna(0)
#converting the pivot table to a long format
#resetting the index first
india_partnership_pivot_reset = india_partnership_pivot.reset_index()
# Flatten the MultiIndex columns to ensure 'over' is a regular column
india_partnership_pivot_reset.columns = india_partnership_pivot_reset.columns.map('_'.join).str.strip('_')
# Converting the pivot table to a long format
india_partnership_long = india_partnership_pivot_reset.melt(id_vars=["over"], var_name="batter_non_striker", value_name="runs_total")
# Splitting the combined 'batter_non_striker' column into 'batter' and 'non_striker'
india_partnership_long[['batter', 'non_striker']] = india_partnership_long['batter_non_striker'].str.split('_', expand=True)
india_partnership_long = india_partnership_long.drop(columns=['batter_non_striker'])
india_partnership_long
| over | runs_total | batter | non_striker | |
|---|---|---|---|---|
| 0 | 0 | 1.0 | RG Sharma | V Kohli |
| 1 | 1 | 0.0 | RG Sharma | V Kohli |
| 2 | 2 | 0.0 | RG Sharma | V Kohli |
| 3 | 3 | 0.0 | RG Sharma | V Kohli |
| 4 | 4 | 0.0 | RG Sharma | V Kohli |
| ... | ... | ... | ... | ... |
| 147 | 14 | 1.0 | SA Yadav | S Dube |
| 148 | 15 | 2.0 | SA Yadav | S Dube |
| 149 | 16 | 14.0 | SA Yadav | S Dube |
| 150 | 17 | 4.0 | SA Yadav | S Dube |
| 151 | 18 | 1.0 | SA Yadav | S Dube |
152 rows × 4 columns
#Creating a stacked bar chart
fig=go.Figure()
#adding bars for each partnership
for (batter,non_striker) in india_partnership_pivot.columns:
partnership_data=india_partnership_long[(india_partnership_long["batter"]==batter) & (india_partnership_long["non_striker"]==non_striker)]
fig.add_trace(go.Bar(x=partnership_data["over"],y=partnership_data["runs_total"],name=f'{batter} & {non_striker}'))
fig.update_layout(title="Partnership Contributions - India",xaxis_title="Over",yaxis_title="Runs",barmode="stack",template="plotly_white",
legend_title="Partnership",legend=dict(x=1.05,y=1,traceorder="normal",font=dict(size=10)),autosize=False,width=900,height=600)
fig.show()
#resetting the index first
usa_partnership_pivot_reset = usa_partnership_pivot.reset_index()
# Flatten the MultiIndex columns to ensure 'over' is a regular column
usa_partnership_pivot_reset.columns = usa_partnership_pivot_reset.columns.map('_'.join).str.strip('_')
# Converting the pivot table to a long format
usa_partnership_long = usa_partnership_pivot_reset.melt(id_vars=["over"], var_name="batter_non_striker", value_name="runs_total")
# Splitting the combined 'batter_non_striker' column into 'batter' and 'non_striker'
usa_partnership_long[['batter', 'non_striker']] = usa_partnership_long['batter_non_striker'].str.split('_', expand=True)
usa_partnership_long = usa_partnership_long.drop(columns=['batter_non_striker'])
#creating a stacked bar chart
fig=go.Figure()
#adding bars for each partnership
for (batter,non_striker) in usa_partnership_pivot.columns:
partnership_data=usa_partnership_long[(usa_partnership_long["batter"]==batter) & (usa_partnership_long["non_striker"]==non_striker)]
fig.add_trace(go.Bar(x=partnership_data["over"],y=partnership_data["runs_total"],name=f'{batter} & {non_striker}'))
fig.update_layout(title="Partnership Contributions - USA",xaxis_title="Over",yaxis_title="Runs",barmode="stack",template="plotly_white",
legend_title="Partnership",legend=dict(x=1.05,y=1,traceorder="normal",font=dict(size=10)),autosize=False,width=900,height=600)
fig.show()
#cumulative runs for both teams over the overs
data[data["team"]=="India"].groupby("over")["runs_total"].sum().cumsum()
over 0 2 1 10 2 12 3 16 4 25 5 33 6 36 7 39 8 41 9 47 10 53 11 55 12 60 13 67 14 81 15 87 16 102 17 107 18 111 Name: runs_total, dtype: int64
india_cumulative_runs=data[data["team"]=="India"].groupby("over")["runs_total"].sum().cumsum()
usa_cumulative_runs=data[data["team"]=="United States of America"].groupby("over")["runs_total"].sum().cumsum()
#extracting key moments where wickets fell or significant runs were scored
india_key_moments=data[(data["team"]=="India") & data["wickets_0_player_out"].notna()]
usa_key_moments=data[(data["team"]=="United States of America") & data["wickets_0_player_out"].notna()]
#significant runs scored by each team
india_significant_runs=data[(data["team"]=="India") & (data["runs_total"]>=4)]
usa_significant_runs=data[(data["team"]=="United States of America") & (data["runs_total"]>=4)]
data[(data["team"]=="United States of America") & data["wickets_0_player_out"].notna()].groupby("over").size().cumsum()
over 0 2 7 3 11 4 14 5 16 6 17 7 19 8 dtype: int64
usa_wickets_fall=data[(data["team"]=="United States of America") & data["wickets_0_player_out"].notna()].groupby("over").size().cumsum()
fig=go.Figure()
fig.add_trace(go.Scatter(x=usa_cumulative_runs.index,
y=usa_cumulative_runs.values,
mode="lines+markers",
name="USA Cumulative Runs",
line=dict(color="blue")))
fig.add_trace(go.Scatter(x=usa_wickets_fall.index,
y=usa_cumulative_runs.loc[usa_wickets_fall.index],
mode="markers",
name="USA wickets",
marker=dict(color="red",size=10)))
# Add annotations for key moments
for _, row in usa_key_moments.iterrows():
fig.add_annotation(x=row["over"],y=usa_cumulative_runs.loc[row["over"]],
text=f"{row['batter']} ({row['over']})",
showarrow=True,arrowhead=2,ax=row["over"],ay=usa_cumulative_runs.loc[row["over"]] + 5,
arrowcolor="black")
fig.update_layout(title="USA Keys Moments in Innings",xaxis_title="Overs",yaxis_title="Cumulative Runs",template="plotly_white",
legend_title="USA Innings",autosize=False,width=900,height=600)
fig.show()
india_cumulative_runs=data[data["team"]=="India"].groupby("over")["runs_total"].sum().cumsum()
india_key_moments=data[(data["team"]=="India") & data["wickets_0_player_out"].notna()]
india_significant_runs=data[(data["team"]=="India") & (data["runs_total"]>=4)]
india_wickets_fall=data[(data["team"]=="India") & (data["wickets_0_player_out"].notna())].groupby("over").size().cumsum()
fig=go.Figure()
fig.add_trace(go.Scatter(x=india_cumulative_runs.index,y=india_cumulative_runs.values,mode="lines+markers",
name="India Cumulative Runs",line=dict(color="green")))
fig.add_trace(go.Scatter(x=india_wickets_fall.index,y=india_cumulative_runs.loc[india_wickets_fall.index],
mode="markers",name="India wickets",marker=dict(color="red",size=10)))
for _,row in india_key_moments.iterrows():
fig.add_annotation(x=row["over"],y=india_cumulative_runs.loc[row["over"]],
text=f'{row["batter"]}({row["over"]})',
showarrow=True,
arrowhead=2,
ax=row["over"],
ay=india_cumulative_runs.loc[row["over"]] + 5,
arrowcolor="black")
fig.update_layout(title="India Key Moments In Innings",xaxis_title="Overs",yaxis_title="Cumulative Runs",
template="plotly_white",legend_title="India Innings",autosize=False,width=900,height=600)
fig.show()
data[data["team"]=="India"].groupby("over")["runs_total"].sum().mean()
5.842105263157895
india_run_rate=data[data["team"]=="India"].groupby("over")["runs_total"].sum().mean()
usa_run_rate=data[data["team"]=="United States of America"].groupby("over")["runs_total"].sum().mean()
fig=go.Figure()
fig.add_trace(go.Bar(x=["India","USA"],y=[india_run_rate,usa_run_rate],marker_color=["green","blue"]))
fig.add_annotation(x="India",y=india_run_rate,text=f"{india_run_rate:2f}",showarrow=False,yshift=10)
fig.add_annotation(x="USA",y=usa_run_rate,text=f"{usa_run_rate:2f}",showarrow=False,yshift=10)
fig.update_layout(title="Comparison of Average Run Rate per Over",xaxis_title="Team",yaxis_title="Average Run Rate per Over",
template="plotly_white")
fig.show()
india_run_rate_per_over=data[data["team"]=="India"].groupby("over")["runs_total"].sum()
usa_run_rate_per_over=data[data["team"]=="United States of America"].groupby("over")["runs_total"].sum()
fig=go.Figure()
fig.add_trace(go.Scatter(x=india_run_rate_per_over.index,y=india_run_rate_per_over.values,mode="lines+markers",
name="India Run Rate",line=dict(color="green")))
fig.add_trace(go.Scatter(x=usa_run_rate_per_over.index,y=usa_run_rate_per_over.values,mode="lines+markers",
name="USA Run Rate",line=dict(color="blue")))
fig.update_layout(title="Comparison of Run Rate per Over",xaxis_title="Overs",yaxis_title="Runs",
template="plotly_white",legend_title="Runrate",autosize=False,width=1000,height=600)
fig.show()